## Load libraries
library(covid19)
library(ggplot2)
library(lubridate)
library(dplyr)
library(ggplot2)
library(sp)
library(raster)
library(viridis)
library(ggthemes)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)
# Map data preparation
if(!'map.RData' %in% dir()){
  esp1 <- getData(name = 'GADM', country = 'ESP', level = 1)
# Remove canary
esp1 <- esp1[esp1@data$NAME_1 != 'Islas Canarias',]
espf <- fortify(esp1, region = 'NAME_1')

# Standardize names
# Convert names
map_names <- esp1@data$NAME_1
data_names <- sort(unique(esp_df$ccaa))
names_df <- tibble(NAME_1 = c('Andalucía',
 'Aragón',
 'Cantabria',
 'Castilla-La Mancha',
 'Castilla y León',
 'Cataluña',
 'Ceuta y Melilla',
 'Comunidad de Madrid',
 'Comunidad Foral de Navarra',
 'Comunidad Valenciana',
 'Extremadura',
 'Galicia',
 'Islas Baleares',
 'La Rioja',
 'País Vasco',
 'Principado de Asturias',
 'Región de Murcia'),
 ccaa = c('Andalucía',
 'Aragón',
 'Cantabria',
 'CLM',
 'CyL',
 'Cataluña',
 'Melilla',
 'Madrid',
 'Navarra',
 'C. Valenciana',
 'Extremadura',
 'Galicia',
 'Baleares',
 'La Rioja',
 'País Vasco',
 'Asturias',
 'Murcia'))


espf <- left_join(espf %>% dplyr::rename(NAME_1 = id), names_df)
centroids <- data.frame(coordinates(esp1))
names(centroids) <- c('long', 'lat')
centroids$NAME_1 <- esp1$NAME_1
centroids <- centroids %>% left_join(names_df)

# Get random sampling points

  random_list <- list()
  for(i in 1:nrow(esp1)){
    message(i)
    shp <- esp1[i,]
    # bb <- bbox(shp)
    this_ccaa <- esp1@data$NAME_1[i]
    # xs <- runif(n = 500, min = bb[1,1], max = bb[1,2])
    # ys <- runif(n = 500, min = bb[2,1], max = bb[2,2])
    # random_points <- expand.grid(long = xs, lat = ys) %>%
    #   mutate(x = long,
    #          y = lat)
    # coordinates(random_points) <- ~x+y
    # proj4string(random_points) <- proj4string(shp)
    # get ccaa
    message('getting locations of randomly generated points')
    # polys <- over(random_points,polygons(shp))
    # polys <- as.numeric(polys)
    random_points <- spsample(shp, n = 20000, type = 'random')
    random_points <- data.frame(random_points)
    random_points$NAME_1 <-  this_ccaa
    random_points <- left_join(random_points, names_df) %>% dplyr::select(-NAME_1)
    random_list[[i]] <- random_points
  }
  random_points <- bind_rows(random_list)
  random_points <- random_points %>% mutate(long = x,
                                            lat = y)

save(espf,
     esp1,
     names_df,
     centroids,
     random_points,
     file = 'map.RData')
} else {
  load('map.RData')
}

# Define a function for adding zerio
add_zero <- function (x, n) {
    x <- as.character(x)
    adders <- n - nchar(x)
    adders <- ifelse(adders < 0, 0, adders)
    for (i in 1:length(x)) {
      if (!is.na(x[i])) {
        x[i] <- paste0(paste0(rep("0", adders[i]), collapse = ""), 
                       x[i], collapse = "")
      }
    }
    return(x)
}
options(scipen = '999')

Maps of Spain

make_map <- function(var = 'deaths',
                     data = NULL,
                     pop = FALSE,
                     pop_factor = 100000,
                     points = FALSE,
                     line_color = 'white',
                     add_names = T,
                     add_values = T,
                     text_size = 2.7){
  
  if(is.null(data)){
    data <- esp_df
  }

  left <- espf
  right <- data[,c('ccaa', paste0(var, '_non_cum'))]
  

  names(right)[ncol(right)] <- 'var'
  right <- right %>% group_by(ccaa) %>% summarise(var = sum(var, na.rm = T))
  
  if(pop){
    right <- left_join(right, esp_pop)
    right$var <- right$var / right$pop * pop_factor
  }
  map <- left_join(left, right)
  
  if(points){
    the_points <- centroids %>%
      left_join(right)
    g <- ggplot() +
      geom_polygon(data = map,
         aes(x = long,
             y = lat,
             group = group),
         fill = 'black',
         color = line_color,
         lwd = 0.4, alpha = 0.8) +
      geom_point(data = the_points,
                 aes(x = long,
                     y = lat,
                     size = var),
                 color = 'red',
                 alpha = 0.7) +
      scale_size_area(name = '', max_size = 20)
  } else {
    # cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
    cols <- RColorBrewer::brewer.pal(n = 8, name = 'Blues')
    g <- ggplot(data = map,
         aes(x = long,
             y = lat,
             group = group)) +
    geom_polygon(aes(fill = var),
                 lwd = 0.3,
                 color = line_color) +
      scale_fill_gradientn(name = '',
                           colours = cols)
    # scale_fill_viridis(name = '' ,option = 'magma',
    #                    direction = -1) 
  }
  
  # Add names?
  if(add_names){
    centy <- centroids %>% left_join(right)
    if(add_values){
      centy$label <- paste0(centy$ccaa, '\n(', round(centy$var, digits = 2), ')')
    } else {
      centy$label <- centy$ccaa
    }

    g <- g +
      geom_text(data = centy,
                aes(x = long,
                    y = lat,
                    label = label,
                    group = ccaa),
                alpha = 0.7,
                size = text_size)
  }
  
  g +
    theme_map() +
    labs(subtitle = paste0('Data as of ', max(data$date))) +
    theme(legend.position = 'right')
  
}


make_dot_map <- function(var = 'deaths',
                     date = NULL,
                     pop = FALSE,
                     pop_factor = 100,
                     point_factor = 1,
                     points = FALSE,
                     point_color = 'darkred',
                     point_size = 0.6,
                     point_alpha = 0.5){
  
  
  if(is.null(date)){
    the_date <- max(esp_df$date)
  } else {
    the_date <- date
  }
    right <- esp_df[esp_df$date == the_date,c('ccaa', var)]
   names(right)[ncol(right)] <- 'var'
  if(pop){
    right <- left_join(right, esp_pop)
    right$var <- right$var / right$pop * pop_factor
  }
  map_data <- esp1@data %>%
    left_join(names_df) %>%
      left_join(right)
  map_data$var <- map_data$var / point_factor
  out_list <- list()
  for(i in 1:nrow(map_data)){
    sub_data <- map_data[i,]
    this_value = round(sub_data$var)

    if(this_value >= 1){
      this_ccaa = sub_data$ccaa
      # get some points
      sub_points <- random_points %>% filter(ccaa == this_ccaa)
      sampled_points <- sub_points %>% dplyr::sample_n(this_value)
      out_list[[i]] <- sampled_points
    }
  }
  the_points <- bind_rows(out_list)
  
  g <- ggplot() +
    geom_polygon(data = espf,
         aes(x = long,
             y = lat,
             group = group),
         fill = 'white',
         color = 'black',
         lwd = 0.4, alpha = 0.8) +
    geom_point(data = the_points,
               aes(x = long,
                   y = lat),
               color = point_color,
               size = point_size,
               alpha = point_alpha)
  g +
    theme_map() +
    labs(subtitle = paste0('Data as of ', max(esp_df$date)))
  
}

Deaths

Absolute number of deaths: points

make_map(var = 'deaths',
       points = T) +
  labs(title = 'Number of deaths')

if(!dir.exists('high_quality')){
  dir.create('high_quality')
}
ggsave('high_quality/points_death_absolute.pdf', width = 8, height = 6)

Absolute number of deaths: choropleth

make_map(var = 'deaths',
         line_color = 'darkgrey',
       points = F) +
  labs(title = 'Number of deaths')

ggsave('high_quality/polygons_death_absolute.pdf', width = 8, height = 6)

Number of deaths adjusted by population: points

make_map(var = 'deaths', pop = TRUE, points = T) +
  labs(title = 'Number of deaths per 100,000')

ggsave('high_quality/points_death_adjusted.pdf', width = 8, height = 6)

Number of deaths adjusted by population: polygons

make_map(var = 'deaths', pop = TRUE, points = F, line_color = 'darkgrey') +
  labs(title = 'Number of deaths per 100,000')

ggsave('high_quality/polygons_death_adjusted.pdf', width = 8, height = 6)

Number of deaths: 1 dot per death

make_dot_map(var = 'deaths', point_size = 0.15) +
  labs(title = 'COVID-19 deaths: 1 point = 1 death\nImportant: points are random within each CCAA; do not reflect exact location')

ggsave('high_quality/point_per_death.pdf', width = 8, height = 6)

Cases

Absolute number of cases: points

make_map(var = 'cases',
       points = T) +
  labs(title = 'Number of confirmed cases')

ggsave('high_quality/points_case_absolute.pdf', width = 8, height = 6)

Absolute number of cases: choropleth

make_map(var = 'cases',
         line_color = 'darkgrey',
       points = F) +
  labs(title = 'Number of confirmed cases')

ggsave('high_quality/polygon_case_absolute.pdf', width = 8, height = 6)

Number of cases adjusted by population: points

make_map(var = 'cases', pop = TRUE, points = T) +
  labs(title = 'Number of confirmed cases per 100,000')

ggsave('high_quality/points_case_adjusted.pdf', width = 8, height = 6)

Number of cases adjusted by population: polygons

make_map(var = 'cases', pop = TRUE, points = F,
         line_color = 'darkgrey') +
  labs(title = 'Number of confirmed cases per 100,000')

ggsave('high_quality/polygons_case_adjusted.pdf', width = 8, height = 6)

Number of cases adjusted by population: polygons: JUST 7 MOST RECENT DAYS

keep_dates <- as.Date(seq((max(esp_df$date)-6), max(esp_df$date), by = 1))
data <- esp_df %>% filter(date %in% keep_dates)
make_map(var = 'cases', data = data, pop = TRUE, points = F,
         line_color = 'darkgrey') +
  labs(title = 'Number of confirmed cases per 100,000',
       caption = paste0('Cumulative incidence over the week from ',
                        min(keep_dates), ' through ', 
                        max(keep_dates)))

ggsave('high_quality/polygons_case_adjusted_week.pdf', width = 8, height = 6)

Number of cases: points

make_dot_map(var = 'cases',
             point_size = 0.05, point_alpha = 0.5, point_factor = 10) +
  labs(title = 'COVID-19 cases: 1 point = 10 cases\nImportant: points are random within each CCAA; do not reflect exact location')

ggsave('high_quality/point_per_10_cases.pdf', width = 8, height = 6)

Numbers for manuscript

Calculate cumulative incidence per 1.000.000 per week

keep_dates <- as.Date(seq((max(esp_df$date)-6), max(esp_df$date), by = 1))
x <- esp_df %>%
  filter(date %in% keep_dates) %>%
  group_by(ccaa) %>%
  summarise(incident_cases_over_7_days = sum(cases_non_cum)) %>%
  ungroup %>%
  left_join(esp_pop) %>%
  mutate(incident_cases_over_7_days_per_10e4 = (incident_cases_over_7_days / pop) * 10e4) %>%
  arrange(desc(incident_cases_over_7_days_per_10e4))
kable(x)
ccaa incident_cases_over_7_days pop incident_cases_over_7_days_per_10e4
La Rioja 963 316798 303.97919
CLM 4785 2032863 235.38232
Madrid 14907 6663394 223.71482
Navarra 1062 654214 162.33220
Cataluña 11006 7675217 143.39660
CyL 3335 2399548 138.98451
País Vasco 2888 2207776 130.81037
Aragón 1374 1319291 104.14685
Galicia 2805 2699499 103.90817
Ceuta 62 84777 73.13304
Cantabria 418 581078 71.93527
Extremadura 591 1067710 55.35211
Asturias 517 1022800 50.54752
C. Valenciana 2400 5003769 47.96384
Andalucía 3619 8414240 43.01042
Melilla 35 86487 40.46851
Baleares 335 1149460 29.14412
Murcia 363 1493898 24.29885
Canarias 497 2153389 23.07990